install.packages("readxl")
library(readxl)
Pytania badawcze:
Wczytujemy dane z pliku csv
rent <- read.csv2(file = "../data-raw/rent-poznan.csv")
head(rent)
Wczytamy dane z Excela
rent <- read_excel(path = "../data-raw/rent-poznan.xlsx")
rent <- as.data.frame(rent)
head(rent)
Wybieramy kolumny ze zbioru rent
rent[, 1] # w data.frame wybór jednej kolumny skutkuje wektorem
[1] "19576742" "20292499" "25876297" "30574847" "30774505" "31073093" "31142157" "31238621" "31787215"
[10] "32732377" "32735125" "32737325" "32739025" "33197305" "33603847" "34142495" "34353551" "34387075"
[19] "34603491" "34674153" "35498262" "35614352" "35697526" "35803750" "35809836" "35838188" "35859230"
[28] "35877716" "36022372" "36108930" "36140878" "36306458" "36394558" "36506902" "36508920" "36634154"
[37] "36679966" "36684056" "36706458" "36781616" "36938970" "36960066" "37054002" "37413562" "37413604"
[46] "37447450" "37467842" "37759632" "37769632" "37874316" "37978832" "38032336" "38207096" "38473604"
[55] "38535178" "38643674" "38846288" "38887054" "38992724" "39018820" "39150724" "39238722" "39258710"
[64] "39415476" "39487112" "39525908" "39585954" "39659156" "39671614" "39709748" "39749960" "39819436"
[73] "39836910" "39874374" "39876904" "39982726" "40110596" "40157932" "40307840" "40345152" "40349761"
[82] "40407741" "40514743" "40521895" "40588427" "40651069" "40686301" "40689573" "40706785" "40718147"
[91] "40760747" "40765773" "40782409" "40829297" "40833711" "40846685" "40851365" "40858679" "40862067"
[100] "40867671" "40876079" "40916831" "40929957" "40999579" "40999581" "41065535" "41080707" "41107665"
[109] "41135499" "41154065" "41189175" "41195733" "41200665" "41259217" "41314501" "41384625" "41599391"
[118] "41608879" "41690557" "41713611" "41755263" "41800921" "41950159" "41960421" "41960693" "41983539"
[127] "42007649" "42090221" "42150409" "42158463" "42214479" "42222033" "42226313" "42231861" "42235235"
[136] "42243651" "42268761" "42298337" "42314295" "42339715" "42384859" "42485851" "42595097" "42627047"
[145] "42658889" "42698649" "42703639" "42729723" "42739025" "42800313" "42800751" "42805623" "42812985"
[154] "42850703" "42856493" "42915723" "42981347" "42982971" "43024953" "43027549" "43027993" "43087891"
[163] "43113203" "43116185" "43122383" "43165963" "43172691" "43188483" "43203785" "43214193" "43220477"
[172] "43234277" "43305573" "43316837" "43316839" "43332723" "43333663" "43340707" "43348479" "43394919"
[181] "43395411" "43438467" "43481943" "43547165" "43548273" "43573143" "43601551" "43605709" "43642531"
[190] "43661397" "43676621" "43686853" "43693897" "43763469" "43763507" "43766355" "43777507" "43787485"
[199] "43815105" "43828159" "43831911" "43897690" "43897966" "43929608" "43936564" "43940350" "43990250"
[208] "43991830" "43997650" "44006908" "44014108" "44025192" "44049926" "44060522" "44083368" "44083786"
[217] "44087540" "44103206" "44126668" "44151650" "44171988" "44172292" "44251100" "44293700" "44304322"
[226] "44351882" "44385390" "44393654" "44398400" "44419392" "44434124" "44435450" "44443794" "44592932"
[235] "44655882" "44681384" "44694664" "44694982" "44700204" "44704618" "44715976" "44741178" "44750014"
[244] "44778422" "44787744" "44802040" "44836994" "44843034" "44848056" "44872030" "44872956" "44880112"
[253] "44880190" "44897338" "44916652" "44988856" "44999100" "44999486" "45027940" "45029258" "45045524"
[262] "45052160" "45089994" "45113662" "45115052" "45125050" "45129264" "45129604" "45153524" "45162402"
[271] "45169752" "45179856" "45188848" "45198124" "45241186" "45258448" "45269034" "45273822" "45288018"
[280] "45294340" "45295738" "45312868" "45324284" "45335880" "45336054" "45337888" "45351114" "45396816"
[289] "45411050" "45421094" "45440942" "45441086" "45485790" "45493294" "45499048" "45504854" "45506746"
[298] "45536106" "45589606" "45613076" "45620290" "45641308" "45708170" "45719928" "45721026" "45721042"
[307] "45733068" "45751084" "45757586" "45770882" "45776810" "45785730" "45889972" "45893808" "45914984"
[316] "46075546" "46102906" "46105380" "46123846" "46130316" "46137814" "46142106" "46142250" "46180134"
[325] "46228438" "46231138" "46242236" "46271182" "46279330" "46339548" "46362304" "46386354" "46399510"
[334] "46407982" "46411700" "46483384" "46493380" "46494664" "46535186" "46558528" "46622082" "46666600"
[343] "46682278" "46720162" "46727282" "46739384" "46754618" "46755266" "46813994" "46817474" "46833142"
[352] "46875122" "46877054" "46880614" "46883766" "46910770" "46928378" "46931846" "47031250" "47032176"
[361] "47035324" "47048872" "47070620" "47070802" "47073060" "47083158" "47133406" "47139598" "47158528"
[370] "47187834" "47200596" "47203828" "47210238" "47217488" "47218380" "47220328" "47224278" "47237670"
[379] "47240280" "47257522" "47266048" "47266550" "47274980" "47296062" "47335030" "47352248" "47364260"
[388] "47364992" "47367776" "47386890" "47396280" "47404764" "47439708" "47474448" "47489864" "47508898"
[397] "47518338" "47530080" "47539050" "47564930" "47577996" "47651096" "47665428" "47680896" "47686508"
[406] "47714048" "47758798" "47763142" "47774320" "47782626" "47791546" "47810292" "47826532" "47860328"
[415] "47876950" "47901976" "47906880" "47975342" "47989888" "48004288" "48093240" "48102926" "48120778"
[424] "48129666" "48138024" "48141012" "48141092" "48143212" "48147962" "48188760" "48195860" "48198326"
[433] "48214012" "48218528" "48223278" "48225508" "48225540" "48243796" "48271026" "48305096" "48339770"
[442] "48343388" "48344886" "48354498" "48359386" "48373920" "48405764" "48406654" "48433752" "48460846"
[451] "48466668" "48467564" "48485520" "48495012" "48528176" "48532654" "48545388" "48557344" "48565074"
[460] "48574706" "48575592" "48606970" "48611346" "48624316" "48634096" "48634442" "48648010" "48652250"
[469] "48658044" "48667934" "48682484" "48689616" "48690626" "48704622" "48707684" "48717872" "48718226"
[478] "48735494" "48743792" "48751364" "48770192" "48786324" "48802508" "48824982" "48826946" "48827676"
[487] "48828088" "48828234" "48854202" "48861346" "48886382" "48889162" "48960630" "48960884" "48969696"
[496] "48969774" "49010106" "49030336" "49030378" "49037600" "49039542" "49048892" "49054224" "49080258"
[505] "49080826" "49083578" "49090746" "49099006" "49106772" "49109718" "49125652" "49134600" "49138096"
[514] "49149038" "49155600" "49163732" "49165436" "49170722" "49172970" "49180720" "49184378" "49184742"
[523] "49201282" "49212976" "49222778" "49232916" "49240994" "49248720" "49251494" "49252638" "49254978"
[532] "49260744" "49261278" "49261432" "49261568" "49262246" "49265394" "49286224" "49302098" "49307066"
[541] "49316868" "49318660" "49323442" "49347988" "49354350" "49356244" "49358672" "49362350" "49364332"
[550] "49371618" "49372000" "49377894" "49383836" "49390614" "49399864" "49402118" "49429070" "49447114"
[559] "49447372" "49447824" "49449006" "49449776" "49450634" "49462794" "49466620" "49482489" "49488915"
[568] "49502562" "49502751" "49504395" "49538025" "49547133" "49548504" "49549122" "49549245" "49558167"
[577] "49559019" "49562073" "49566900" "49567890" "49593603" "49609487" "49612283" "49623447" "49630395"
[586] "49682007" "49683235" "49693067" "49703271" "49735859" "49787507" "49804788" "49826304" "49854316"
[595] "49871428" "49882084" "49892244" "49906312" "50006500" "50010188" "50012148" "50017220" "50102336"
[604] "50145460" "50149908" "50197688" "50235080" "50270772" "50324076" "50332084" "50395592" "50419520"
[613] "50468964" "50472696" "50501368" "50503456" "50519528" "50523384" "50527796" "50629412" "50632356"
[622] "50651016" "50672712" "50741516" "50750348" "50759432" "50776368" "50784140" "50789312" "50805760"
[631] "50864580" "50903252" "51052556" "51069788" "51093676" "51097200" "51121404" "51126324" "51165576"
[640] "51262336" "51266864" "51399072" "51409968" "51416408" "51443272" "51443648" "51490916" "51550260"
[649] "51551876" "51553048" "51556236" "51556420" "51563200" "51576692" "51577636" "51619244" "51671364"
[658] "51675648" "51716744" "51717600" "51732300" "51733892" "51762380" "51770952" "51791092" "51810520"
[667] "51820884" "51823628" "51898724" "51900188" "51938848" "51939640" "51958040" "51966736" "51971032"
[676] "51979696" "51990976" "52070539" "52085811" "52090751" "52092335" "52096287" "52109147" "52112551"
[685] "52180595" "52205635" "52205899" "52206759" "52207075" "52235327" "52286179" "52296223" "52303247"
[694] "52326875" "52327491" "52327531" "52356191" "52365151" "52368135" "52450331" "52455083" "52457179"
[703] "52461235" "52464187" "52464699" "52464955" "52478483" "52482511" "52490611" "52494415" "52534163"
[712] "52538155" "52538959" "52564483" "52578151" "52635379" "52637047" "52651043" "52661799" "52674243"
[721] "52694767" "52695523" "52710115" "52722719" "52734119" "52741619" "52749403" "52769127" "52772103"
[730] "52774091" "52777755" "52781227" "52781367" "52785295" "52796883" "52814743" "52849863" "52874191"
[739] "52875835" "52877551" "52880915" "52908727" "52918143" "52944071" "52966283" "52968011" "52968975"
[748] "52971043" "52996771" "53022971" "53031095" "53048015" "53061039" "53084835" "53091027" "53093971"
[757] "53123683" "53128255" "53138643" "53157543" "53180700" "53185928" "53202504" "53213920" "53215180"
[766] "53252704" "53266440" "53283072" "53291116" "53291632" "53305308" "53336684" "53337116" "53338856"
[775] "53363316" "53372440" "53393812" "53396360" "53418156" "53426344" "53437820" "53438592" "53442568"
[784] "53458828" "53478744" "53481412" "53493260" "53508304" "53514344" "53516060" "53521636" "53539488"
[793] "53558164" "53560576" "53574808" "53575288" "53575312" "53584916" "53590244" "53630656" "53631048"
[802] "53638480" "53646644" "53647056" "53647572" "53659388" "53660964" "53670064" "53673664" "53688636"
[811] "53697416" "53703620" "53723848" "53728888" "53731220" "53731560" "53737484" "53740040" "53758400"
[820] "53780328" "53780468" "53786604" "53797796" "53808896" "53809160" "53814860" "53855840" "53858052"
[829] "53880320" "53895620" "53908000" "53909328" "53922208" "53940944" "53943212" "53978028" "53986068"
[838] "54002428" "54009940" "54014872" "54015744" "54017756" "54039080" "54041908" "54062548" "54072664"
[847] "54083476" "54106788" "54108064" "54109904" "54120364" "54150216" "54155800" "54157356" "54165336"
[856] "54171540" "54171640" "54177956" "54180384" "54182668" "54195744" "54235660" "54236312" "54255996"
[865] "54274584" "54275632" "54281512" "54299288" "54320140" "54322780" "54329904" "54331776" "54346460"
[874] "54373864" "54374648" "54376316" "54377288" "54377896" "54378004" "54381012" "54381240" "54381324"
[883] "54394052" "54400708" "54429680" "54430100" "54447744" "54449328" "54462972" "54469796" "54470868"
[892] "54487764" "54490860" "54491976" "54509104" "54511800" "54514144" "54528924" "54536760" "54563032"
[901] "54579352" "54585380" "54586468" "54603824" "54604100" "54612744" "54616412" "54620656" "54621772"
[910] "54628572" "54629880" "54636564" "54645420" "54675648" "54682268" "54682772" "54689796" "54702460"
[919] "54704528" "54712940" "54724936" "54736856" "54745148" "54749916" "54753208" "54754368" "54760156"
[928] "54763236" "54769924" "54781596" "54782672" "54789096" "54800512" "54814808" "54821436" "54836036"
[937] "54838764" "54841916" "54842180" "54852440" "54866264" "54866944" "54875012" "54875584" "54881960"
[946] "54889540" "54900356" "54901376" "54917772" "54918848" "54924404" "54924672" "54938324" "54942804"
[955] "54980380" "54985312" "54987340" "54995580" "55000056" "55002292" "55036640" "55037324" "55039208"
[964] "55052168" "55055984" "55057276" "55063540" "55063660" "55075132" "55078748" "55080684" "55081040"
[973] "55082500" "55091420" "55114576" "55115372" "55121108" "55130888" "55137192" "55143740" "55149876"
[982] "55150356" "55151004" "55153896" "55156080" "55165096" "55168456" "55192880" "55210024" "55216480"
[991] "55216936" "55219464" "55224420" "55224848" "55227228" "55227428" "55227680" "55227744" "55235764"
[1000] "55239384"
[ reached getOption("max.print") -- omitted 15448 entries ]
rent[, 1, drop = FALSE] # drop = FALSE mówi, że mamy dostać ramkę danych
rent[, 1:10]
rent[, ncol(rent), drop = FALSE] # ostatnia kolumna
rent[, c(1, 5, 10)]
rent[, c("id", "price", "flat_area")]
rent$price ## wektor
[1] 1400 4600 2300 1600 1200 1800 1500 1400 1300 1200 2000 2000 1600 1400 1990 1290
[17] 1200 1800 1300 1500 1800 1500 3400 1400 1149 1400 1100 1000 1600 1600 1650 1200
[33] 1300 1300 3200 1980 1550 1600 1200 1100 1300 1400 1700 1850 1900 1900 2100 1300
[49] 1250 2400 1650 1500 2300 1670 1300 1200 1650 1100 1100 1700 1300 1450 2400 1100
[65] 1350 1600 2099 1300 2200 1300 2000 1300 1500 1550 2500 1600 1800 1300 1450 970
[81] 1700 1700 1600 2500 1600 2350 1500 1650 3500 2300 3000 3000 1500 1590 2250 1600
[97] 1600 2000 1250 1600 1400 1350 1500 1490 2500 2800 2200 1400 2600 1390 1900 1000
[113] 1200 2400 1700 3500 2500 2950 1100 1800 1500 1400 1500 1400 1450 1200 1400 1490
[129] 1690 1500 1300 2000 1300 1800 1300 1999 2200 1950 2400 1600 1800 1800 1350 2800
[145] 2200 1000 1100 1700 2200 1390 1450 1199 1400 1650 1300 1200 1300 1150 1800 2500
[161] 1600 2000 1800 2000 1900 1320 1950 1650 1100 1750 1200 1200 1800 1300 1500 2200
[177] 1400 1100 1150 1450 2300 1350 1600 1250 1050 1200 1000 1390 1350 1350 2500 1700
[193] 1600 1430 1950 1700 1300 2200 1150 2590 1500 950 1700 1350 1130 1250 1400 1500
[209] 1600 1750 1450 1600 1500 1290 1250 1800 1790 1200 1600 1650 2000 2800 1700 1500
[225] 1450 3550 1800 1500 1900 1700 1100 1450 1900 1000 1300 1350 2300 1400 1600 1800
[241] 1600 1600 1400 550 1600 1500 1499 3200 1200 1750 1750 1300 2990 700 1700 1500
[257] 1350 2650 1600 1990 1099 1300 1700 1600 1800 1330 1890 1700 1449 1200 1650 1550
[273] 1500 1900 2200 1700 2200 290 1550 2000 800 2300 1350 1500 1500 1000 1350 1700
[289] 1750 1520 1500 1400 1750 1500 1400 1800 2300 2200 1000 1000 1400 1600 2000 1799
[305] 1150 950 3300 3000 2400 1000 2800 1800 2500 1550 1700 1299 2300 1800 1600 1750
[321] 1350 2300 1600 1300 1200 1250 2400 1300 1950 2300 1600 1950 1500 1500 1600 1650
[337] 1000 1600 1850 1600 2100 1650 2000 2000 970 1750 2600 1300 1600 1200 1300 7500
[353] 1400 1250 1400 1750 1050 2900 1700 1380 1800 1350 2300 1350 1250 1350 1400 1250
[369] 1800 2000 1900 1850 2800 1250 1950 1600 1300 1390 1650 1800 1300 1700 1600 1500
[385] 2000 1450 1300 1450 1450 1550 1800 1600 1300 1100 1500 900 1600 1300 1200 1700
[401] 1200 1700 1480 1800 1500 1990 1950 1100 1800 1200 900 1200 1390 1600 1200 3900
[417] 1650 3550 1200 1250 1700 3800 1580 2000 1600 1550 1100 1500 1950 1700 1450 1600
[433] 2100 1950 2200 1600 1700 1500 1200 1300 1700 1200 1800 1950 1600 2250 1699 1900
[449] 1500 2250 1700 1300 2000 1600 3500 2100 1100 2300 1500 1450 2300 1500 9900 2200
[465] 1670 2400 1199 1900 1390 1200 1500 1800 1700 1700 1300 1500 2000 1400 1250 1550
[481] 2990 2500 1500 1400 2200 2000 1800 1500 2000 2000 2800 1800 1300 2100 2100 1350
[497] 1700 1600 2000 1580 2100 3500 2500 1300 1200 1600 1200 1200 1600 2500 1300 1290
[513] 1600 2000 2100 980 1350 1200 4200 1600 1500 2200 2150 1600 2300 1100 3000 1600
[529] 1600 1400 2600 1700 1400 1600 1900 2100 1600 1300 1700 1400 1800 1100 1900 1650
[545] 2200 1350 1600 1400 1300 1500 1100 1700 2300 1600 1500 1900 2400 1800 2100 1200
[561] 1250 2000 1600 1700 2100 1199 1100 1250 1200 1400 2000 1650 1900 2200 2800 2100
[577] 1500 1690 1650 2700 1500 1500 1540 1850 1500 750 1250 1300 1400 1300 1650 2200
[593] 3900 1200 1650 1500 1400 1400 1400 1950 1500 1450 2000 1000 1600 1250 2100 2450
[609] 1400 1650 2600 1700 1850 1950 1900 2250 1200 1800 1200 13900 1700 2100 1300 1300
[625] 1700 900 2000 1300 1400 2300 1800 1900 2500 2300 2700 2250 1450 1750 1200 1300
[641] 1500 1800 1590 1400 2000 2400 1850 2400 1800 2000 1750 2750 6500 1500 1200 1700
[657] 1450 2300 1200 2200 1650 2150 1600 1420 2200 1950 1100 1800 1200 1600 3200 3300
[673] 1150 3500 2500 1600 1200 1750 1700 1600 2500 2200 1200 1599 2500 1390 1700 1400
[689] 1600 2100 850 2200 2400 1000 1800 1850 1700 2300 1650 1650 1400 1900 2150 1350
[705] 1650 1700 1500 1650 2100 1290 2300 1400 1390 1300 1300 1100 1400 1450 2000 1500
[721] 1000 1800 1300 1590 2000 2500 2000 1000 1150 1850 1500 2800 3000 900 2200 1500
[737] 1400 1000 1800 1800 990 1650 1500 1300 2400 1800 2500 1900 1600 3000 1600 1200
[753] 2500 1200 2600 5000 1500 1100 1100 3900 1250 1950 1600 1650 1150 1600 1600 2300
[769] 1700 2500 1200 1100 2500 1800 1700 1300 1250 1450 2300 1250 1100 1400 1550 1500
[785] 1200 1500 1900 1700 1650 1700 2700 1800 1150 1520 2090 1700 2350 1800 2200 1600
[801] 1850 1400 2300 1650 2200 1350 1500 3800 1550 1900 1800 2700 2100 1300 1450 1700
[817] 2000 2000 2100 2600 1600 2100 1300 1300 1900 1650 1200 1750 1900 1800 1500 2000
[833] 1200 1349 1800 2400 1650 1800 1700 1200 1250 2200 1600 1600 1780 2100 1300 1550
[849] 2940 1700 2090 1750 1850 1500 1950 1600 1800 1000 1450 3100 1200 1850 2600 1750
[865] 1700 1600 1250 1800 1900 1300 2400 2200 1500 1700 1700 2000 1000 1600 2000 1199
[881] 2200 2000 1900 1400 4400 1200 1250 2700 1650 3200 1500 1750 1200 1900 1600 1300
[897] 1800 1500 1400 2200 1350 2450 1350 1600 2000 1500 1250 2300 1400 1490 2500 1600
[913] 2500 1300 2100 2700 1990 1500 1100 1600 1600 2400 1150 2000 2200 1250 1100 1350
[929] 1500 1050 2100 1600 1900 1300 3000 2000 3500 1150 1600 1200 1400 1300 3000 1400
[945] 2000 2200 1999 1400 1700 1500 1750 1350 1900 1600 1190 1600 2000 1400 1900 1500
[961] 1950 2050 800 1250 1300 1300 2500 2700 1400 1500 1300 1700 1200 2900 1000 1300
[977] 1600 1400 1400 1500 1050 2700 2200 1050 4200 1500 1500 1400 1750 1200 2200 1300
[993] 2700 2800 1500 1800 2200 1200 1800 1800
[ reached getOption("max.print") -- omitted 15448 entries ]
summary(rent$price)
Min. 1st Qu. Median Mean 3rd Qu. Max.
250 1350 1600 1763 2000 45000
Wybór kolumn z funkcją subset
subset(rent, select = 1)
subset(rent, select = 1:3)
subset(rent, select = c(1,5, 10))
subset(rent, select = c("id", "price", "flat_area"))
subset(rent, select = id:flat_area)
Wybór wierszy
rent[1,]
rent[1:3,]
rent[c(1, 5, 10), ]
rent[nrow(rent), ] ## tail(rent, 1)
mtcars["Mazda RX4",] # używam tutaj nazw wierszy
Wybieram mieszkania, których cena była niższa lub równa 300 zł
rent[rent$price <= 300, c("id", "price", "ad_title", "flat_area")]
rent[rent$price > 15000, c("id", "price", "ad_title", "flat_area")]
rent[rent$price > 300 & rent$price < 500,
c("id", "price", "ad_title", "flat_area")]
Wybieramy wiersze i kolumny z funkcją subset
subset(rent,
subset = price > 300 & price < 500,
select = c("id", "price", "ad_title", "flat_area"))
subset(rent,
price > 300 & price < 500,
c("id", "price", "ad_title", "flat_area"))
Zadania
subset(rent, flat_for_students == 1 & price < 1000)
subset(rent, flat_for_students == TRUE & price < 1000)
subset(rent, flat_for_students == T & price < 1000)
subset(rent, flat_area <= 50)
subset(rent, flat_area <= 50 & quarter == "centrum") ## to jest źle (mała litera)
subset(rent, flat_area <= 50 & quarter == "Centrum")
subset(rent, (quarter == "Centrum" | quarter == "Rataje") & flat_area <= 50)
subset(rent, quarter %in% c("Centrum", "Rataje") & flat_area <= 50)
subset(rent, flat_deposit == price, c("price", "flat_deposit"))
subset(rent, (flat_deposit/2) > 3*price, c("price", "flat_deposit"))
subset(rent,
grepl("ataner", ad_title, ignore.case = T),
c("ad_title", "price"))
Tworzymy nowe kolumny
rent$cena_m2
[1] 46.666667 28.750000 42.592593 32.653061 36.923077 35.294118 31.914894 50.000000 43.333333
[10] 42.857143 31.746032 41.666667 33.333333 40.000000 27.638889 33.947368 30.000000 36.734694
[19] 34.210526 23.076923 39.130435 48.387097 44.736842 56.000000 37.672131 31.111111 27.500000
[28] 33.333333 32.000000 51.612903 42.091837 34.285714 36.111111 41.935484 37.209302 46.046512
[37] 26.956522 27.586207 35.294118 32.352941 48.148148 44.728435 31.481481 52.857143 54.285714
[46] 38.000000 38.888889 41.935484 39.062500 33.333333 31.730769 30.000000 35.384615 33.400000
[55] 39.393939 30.000000 27.500000 28.947368 39.285714 35.416667 43.333333 36.250000 33.802817
[64] 27.989822 35.526316 53.333333 29.563380 41.269841 41.509434 50.000000 30.534351 29.545455
[73] 37.500000 32.978723 100.000000 32.323232 42.857143 34.210526 46.774194 24.556962 30.357143
[82] 39.534884 20.779221 58.139535 30.769231 37.301587 35.714286 28.448276 35.000000 42.592593
[91] 27.272727 26.086957 39.473684 41.842105 37.500000 30.188679 36.199095 55.555556 29.761905
[100] 32.000000 43.750000 38.571429 46.875000 49.666667 49.019608 37.333333 44.897959 28.571429
[109] 43.333333 28.958333 38.000000 45.454545 40.000000 42.105263 24.285714 50.000000 37.878788
[118] 53.636364 55.000000 40.000000 34.883721 29.166667 37.500000 46.666667 45.312500 44.444444
[127] 31.111111 36.341463 49.705882 39.473684 39.393939 38.834951 50.000000 26.865672 27.083333
[136] 36.345455 50.000000 31.967213 46.153846 33.333333 33.962264 36.000000 27.439024 51.851852
[145] 45.833333 33.333333 35.483871 40.476190 45.833333 46.333333 53.703704 35.264706 33.333333
[154] 50.000000 50.000000 30.000000 38.235294 41.071429 41.860465 41.666667 30.188679 66.666667
[163] 34.615385 30.769231 37.549407 45.517241 32.500000 61.111111 34.375000 31.818182 37.500000
[172] 27.906977 34.615385 38.235294 30.303030 48.888889 29.787234 55.000000 38.333333 38.157895
[181] 46.000000 58.695652 41.025641 39.062500 42.000000 42.857143 47.619048 30.217391 39.705882
[190] 45.000000 45.454545 48.571429 40.000000 47.666667 36.792453 30.909091 25.000000 36.666667
[199] 48.936170 47.962963 28.846154 39.583333 45.945946 46.551724 37.666667 32.051282 31.111111
[208] 30.000000 31.372549 37.473233 29.000000 28.571429 31.578947 40.312500 44.642857 30.981067
[217] 49.722222 48.000000 25.000000 31.730769 41.666667 34.146341 37.777778 48.387097 29.000000
[226] 47.333333 42.857143 30.612245 44.186047 40.476190 25.171625 23.015873 32.758621 32.258065
[235] 46.428571 39.705882 41.818182 23.333333 33.333333 38.297872 32.653061 31.683168 50.000000
[244] 8.593750 29.629630 30.612245 29.980000 29.090909 39.603960 35.282258 43.750000 34.210526
[253] 46.000000 28.000000 36.956522 27.272727 27.000000 37.323944 50.000000 62.756228 39.250000
[262] 40.625000 42.500000 34.042553 36.734694 34.102564 33.750000 31.775701 39.364303 23.529412
[271] 42.307692 29.807692 29.815146 44.186047 31.884058 32.075472 31.428571 4.084507 51.666667
[280] 43.383948 12.307692 34.328358 28.125000 33.333333 34.090909 26.315789 50.000000 36.956522
[289] 43.750000 28.148148 28.301887 37.363224 43.103448 34.883721 31.818182 36.000000 35.384615
[298] 43.002346 50.000000 58.823529 27.450980 27.586207 35.701535 32.709091 46.000000 41.304348
[307] 52.380952 40.540541 43.636364 43.478261 45.095829 60.000000 44.642857 50.000000 35.416667
[316] 40.593750 33.333333 33.962264 26.666667 27.343750 48.214286 42.592593 35.555556 59.090909
[325] 40.000000 25.000000 33.333333 27.083333 37.500000 21.575985 27.586207 33.050847 37.500000
[334] 38.461538 34.042553 33.000000 40.000000 25.806452 57.812500 37.209302 39.622642 39.285714
[343] 57.142857 58.823529 60.625000 48.611111 41.269841 52.000000 23.529412 36.363636 41.935484
[352] 47.468354 28.571429 25.000000 29.166667 36.458333 37.500000 44.615385 34.000000 39.428571
[361] 39.130435 45.000000 42.279412 40.909091 39.556962 30.000000 42.424242 31.250000 38.297872
[370] 36.363636 42.222222 43.023256 41.176471 41.666667 44.318182 50.000000 25.490196 39.714286
[379] 33.673469 30.000000 33.333333 39.534884 40.000000 49.180328 30.769231 47.077922 36.111111
[388] 27.102804 27.358491 30.632411 24.000000 53.333333 38.235294 40.740741 42.857143 45.000000
[397] 34.042553 28.888889 40.000000 32.692308 37.500000 32.692308 30.833333 45.000000 42.372881
[406] 25.512821 39.000000 27.500000 32.727273 37.500000 28.125000 24.000000 28.367347 26.666667
[415] 34.285714 34.821429 25.384615 42.771084 46.153846 39.062500 36.956522 44.186047 49.375000
[424] 36.363636 42.105263 31.000000 44.000000 39.473684 43.333333 39.534884 34.523810 43.243243
[433] 50.000000 28.260870 44.000000 35.555556 37.777778 30.612245 40.000000 43.333333 34.000000
[442] 52.173913 25.714286 42.391304 35.802193 45.000000 42.475000 29.230769 31.914894 39.473684
[451] 38.742024 39.658328 42.553191 40.000000 25.000000 30.000000 37.931034 28.750000 30.000000
[460] 26.851852 30.263158 46.875000 41.422594 34.375000 26.507937 35.820896 36.333333 39.583333
[469] 28.367347 34.285714 40.540541 33.962264 30.357143 18.888889 26.000000 46.439628 33.333333
[478] 43.343653 40.322581 33.695652 21.357143 35.714286 31.380753 33.333333 27.500000 31.250000
[487] 37.500000 25.000000 37.735849 17.857143 20.895522 47.368421 32.500000 32.307692 53.846154
[496] 20.454545 31.481481 37.209302 60.606061 23.582090 42.168675 50.000000 25.000000 32.500000
[505] 38.709677 26.666667 34.285714 50.000000 43.243243 42.372881 27.083333 67.894737 46.920821
[514] 46.511628 61.764706 39.200000 54.000000 36.429872 53.164557 28.571429 30.731407 40.740741
[523] 30.714286 45.714286 62.162162 45.833333 28.846154 32.653061 32.000000 34.610630 31.707317
[532] 31.481481 40.000000 33.333333 38.000000 42.857143 35.555556 36.111111 34.693878 36.745407
[541] 33.333333 29.729730 38.000000 43.524136 47.826087 27.000000 34.042553 51.851852 34.210526
[550] 39.473684 46.808511 33.333333 56.097561 40.000000 30.612245 36.538462 34.042553 51.428571
[559] 36.206897 25.000000 25.510204 41.666667 28.571429 51.515152 38.888889 32.405405 26.829268
[568] 28.409091 31.578947 40.579710 39.215686 41.250000 33.928571 37.931034 36.842105 46.666667
[577] 39.164491 28.166667 97.058824 42.187500 40.540541 31.250000 29.615385 37.755102 30.000000
[586] 41.666667 26.595745 43.333333 27.450980 34.210526 39.285714 34.108527 33.050847 30.769231
[595] 31.132075 23.437500 28.000000 73.684211 73.684211 36.792453 34.090909 29.896907 50.000000
[604] 27.397260 84.210526 33.783784 52.500000 40.833333 82.352941 41.250000 40.061633 35.416667
[613] 56.060606 54.166667 33.928571 42.452830 27.272727 35.294118 38.338658 110.317460 44.736842
[622] 42.000000 26.000000 30.952381 31.481481 36.000000 41.666667 37.142857 28.000000 30.666667
[631] 36.000000 43.280182 37.650602 36.507937 36.486486 53.571429 45.312500 33.653846 31.496063
[640] 40.752351 28.301887 36.734694 79.500000 41.176471 28.169014 33.333333 44.152745 64.690027
[649] 40.000000 25.000000 43.750000 42.968750 76.470588 51.724138 36.363636 25.373134 29.000000
[658] 46.000000 54.545455 55.000000 29.464286 43.000000 37.209302 33.023256 27.500000 43.820225
[667] 34.375000 39.130435 34.285714 33.333333 43.243243 50.769231 34.534535 45.454545 22.727273
[676] 31.809145 30.769231 34.313725 38.636364 59.259259 31.645570 42.307692 44.444444 44.416667
[685] 50.000000 30.888889 36.956522 30.434783 32.653061 40.384615 32.692308 34.209299 37.500000
[694] 38.461538 51.428571 48.684211 41.463415 47.916667 47.142857 55.000000 46.666667 35.185185
[703] 28.104575 45.000000 34.375000 31.481481 60.000000 45.205479 37.906137 42.574257 28.048780
[712] 26.415094 46.333333 40.625000 41.680026 36.666667 29.166667 43.939394 44.444444 30.000000
[721] 23.809524 63.157895 42.345277 24.461538 37.735849 50.000000 37.037037 28.571429 42.592593
[730] 25.694444 46.875000 29.473684 31.914894 28.125000 55.000000 39.473684 46.666667 35.714286
[739] 46.153846 37.894737 36.666667 32.352941 34.883721 29.545455 29.268293 35.294118 28.735632
[748] 38.000000 36.363636 58.823529 48.484848 34.285714 35.714286 42.857143 37.790698 41.666667
[757] 27.777778 34.375000 32.352941 62.903226 43.103448 44.318182 84.210526 35.106383 38.333333
[766] 50.000000 39.024390 31.807496 32.075472 27.777778 27.906977 32.352941 38.461538 40.000000
[775] 34.000000 40.625000 40.115533 32.222222 41.818182 30.295686 33.333333 32.941176 55.357143
[784] 37.500000 47.430830 51.724138 42.222222 56.666667 55.000000 38.636364 39.705882 29.032258
[793] 28.500620 31.666667 41.800000 42.500000 50.000000 27.692308 54.320988 37.209302 47.435897
[802] 56.000000 33.155543 55.000000 34.920635 43.062201 30.000000 82.608696 32.631579 20.879121
[811] 27.692308 35.526316 50.000000 43.333333 55.769231 36.956522 50.000000 50.000000 42.000000
[820] 40.000000 32.653061 58.333333 43.333333 50.000000 38.000000 30.555556 24.489796 51.470588
[829] 44.906641 32.727273 42.857143 28.078057 22.641509 42.156250 45.000000 40.677966 39.285714
[838] 38.297872 41.463415 20.000000 52.083333 29.729730 37.209302 40.000000 48.108108 48.837209
[847] 26.530612 34.444444 40.833333 29.824561 41.800000 56.451613 37.000000 34.090909 49.630949
[856] 33.826638 34.615385 37.593985 31.521739 57.407407 46.153846 38.541667 43.333333 36.104807
[865] 34.000000 37.209302 41.666667 47.368421 37.379500 43.333333 42.105263 36.666667 48.231511
[874] 23.129252 34.000000 30.769231 26.315789 34.042553 20.000000 46.115385 40.740741 38.461538
[883] 45.238095 28.688525 48.888889 27.906977 35.714286 22.632020 55.000000 51.612903 33.333333
[892] 43.750000 37.383178 41.304348 32.000000 26.530612 41.570439 31.120332 25.454545 32.835821
[901] 53.149606 52.127660 32.926829 24.242424 43.478261 31.914894 46.746447 56.097561 45.161290
[910] 36.341463 34.246575 35.555556 44.642857 28.138528 39.622642 36.000000 52.176193 40.540541
[919] 34.375000 29.090909 32.000000 43.636364 36.507937 38.461538 37.288136 39.062500 28.947368
[928] 36.885246 24.590164 45.652174 46.666667 33.333333 44.186047 40.625000 44.776119 44.444444
[937] 60.344828 32.857143 35.555556 36.363636 45.161290 28.260870 36.144578 42.424242 31.250000
[946] 37.697053 37.018519 31.111111 38.759690 35.714286 43.750000 34.722222 52.777778 36.363636
[955] 62.631579 30.188679 45.454545 31.111111 39.583333 20.270270 30.468750 41.836735 48.192771
[964] 32.894737 41.935484 34.210526 48.076923 49.090909 46.666667 35.714286 37.142857 41.463415
[973] 27.272727 41.428571 26.315789 37.142857 26.229508 28.000000 48.275862 31.250000 40.384615
[982] 23.478261 19.515657 37.500000 39.622642 21.428571 31.914894 26.923077 35.000000 36.363636
[991] 40.000000 36.111111 54.000000 44.800000 68.181818 66.666667 39.285714 40.000000 23.076923
[1000] 37.500000
[ reached getOption("max.print") -- omitted 15448 entries ]
rent$centrum <- with(rent, quarter == "Centrum")
rent <- transform(rent, rataje = quarter == "Rataje")
head(rent)
Ćwiczenie:
Utworzyć kolumny z funkcją transform
price_log - logartym naturalny ceny area_log - logarytm naturalny powierzchni kawalerka - jeżeli w tytule ogłoszenia jest słowo kawalerka
rent <- transform(rent ,
price_log = log(price),
area_log = log(flat_area),
kawalerka = grepl("kawalerka", ad_title, ignore.case = TRUE))
Tworzymy nową kolumnę z funkcją ifelse
rent$price_tanie_drogie <-
with(rent, ifelse(price < 500 | price > 15000, "tanie-drogie", "inne"))
head(rent[, c("price", "price_tanie_drogie")], n=50)
Tworzymy przedziały dla powierzchni z funkcją cut
table(rent$flat_area_cut)
(0, 10] (10, 20] (20, 30] 30+
84 524 1938 13902
Agregujemy dane
table(rent$flat_rooms)
1 2 3 4 5 6 7 8 9 10 11
4438 8283 3118 500 63 21 9 2 5 1 8
prop.table(table(rent$flat_rooms))*100
1 2 3 4 5 6 7 8
26.982003891 50.358706226 18.956712062 3.039883268 0.383025292 0.127675097 0.054717899 0.012159533
9 10 11
0.030398833 0.006079767 0.048638132
barplot(table(rent$flat_rooms))
Tablica krzyżowa z funkcją table i xtabs
table(rent$flat_rooms, rent$flat_for_students)
FALSE TRUE
1 3154 1284
2 6190 2093
3 2303 815
4 412 88
5 54 9
6 18 3
7 9 0
8 1 1
9 3 2
10 1 0
11 7 1
tab1 <- with(rent, table(flat_rooms, flat_for_students))
prop.table(tab1)*100
flat_for_students
flat_rooms FALSE TRUE
1 19.175583658 7.806420233
2 37.633754864 12.724951362
3 14.001702335 4.955009728
4 2.504863813 0.535019455
5 0.328307393 0.054717899
6 0.109435798 0.018239300
7 0.054717899 0.000000000
8 0.006079767 0.006079767
9 0.018239300 0.012159533
10 0.006079767 0.000000000
11 0.042558366 0.006079767
prop.table(tab1, margin = 2)*100
flat_for_students
flat_rooms FALSE TRUE
1 25.954575379 29.888268156
2 50.938117182 48.719739292
3 18.951612903 18.971135940
4 3.390388413 2.048417132
5 0.444371297 0.209497207
6 0.148123766 0.069832402
7 0.074061883 0.000000000
8 0.008229098 0.023277467
9 0.024687294 0.046554935
10 0.008229098 0.000000000
11 0.057603687 0.023277467
xtabs( ~ flat_rooms + flat_for_students, data=rent)
flat_for_students
flat_rooms FALSE TRUE
1 3154 1284
2 6190 2093
3 2303 815
4 412 88
5 54 9
6 18 3
7 9 0
8 1 1
9 3 2
10 1 0
11 7 1
Funkcja summary do podsumowania zbioru lub zmiennej
summary(rent)
id date_activ date_modif
Length:16448 Min. :2013-07-19 00:00:00.00 Min. :2019-05-02 00:00:00.00
Class :character 1st Qu.:2019-07-28 00:00:00.00 1st Qu.:2019-09-30 00:00:00.00
Mode :character Median :2020-01-14 00:00:00.00 Median :2020-03-27 00:00:00.00
Mean :2019-11-13 09:00:15.75 Mean :2020-02-29 06:17:09.56
3rd Qu.:2020-06-09 00:00:00.00 3rd Qu.:2020-07-28 00:00:00.00
Max. :2020-09-25 00:00:00.00 Max. :2020-09-26 00:00:00.00
date_expire individual price flat_area flat_rooms
Min. :2019-06-01 00:00:00.00 Mode :logical Min. : 250 Min. : 4.20 Min. : 1.000
1st Qu.:2019-10-25 00:00:00.00 FALSE:11536 1st Qu.: 1350 1st Qu.: 35.00 1st Qu.: 1.000
Median :2020-04-21 00:00:00.00 TRUE :4912 Median : 1600 Median : 45.99 Median : 2.000
Mean :2020-03-24 17:36:05.95 Mean : 1763 Mean : 47.75 Mean : 2.008
3rd Qu.:2020-08-21 00:00:00.00 3rd Qu.: 2000 3rd Qu.: 55.00 3rd Qu.: 2.000
Max. :2021-04-04 00:00:00.00 Max. :45000 Max. :2200.00 Max. :11.000
flat_floor_no flat_build_year flat_furnished flat_rent flat_deposit
Min. : 0.00 Min. : 0 Mode :logical Min. : 0.0 Min. : 0
1st Qu.: 2.00 1st Qu.: 0 FALSE:8297 1st Qu.: 0.0 1st Qu.: 0
Median : 3.00 Median : 1965 TRUE :8151 Median : 300.0 Median : 1400
Mean : 4.18 Mean : 1175 Mean : 286.4 Mean : 1289
3rd Qu.: 5.00 3rd Qu.: 2010 3rd Qu.: 455.0 3rd Qu.: 2000
Max. :54.00 Max. :124901 Max. :50000.0 Max. :600067
NA's :673
flat_price_include_rent flat_for_students flat_heating flat_status flat_windows
Mode :logical Mode :logical Min. :0.000 Min. :0.000 Min. :0.000
FALSE:16448 FALSE:12152 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
TRUE :4296 Median :0.000 Median :0.000 Median :0.000
Mean :0.282 Mean :0.006 Mean :0.101
3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
Max. :5.000 Max. :2.000 Max. :2.000
NA's :4148 NA's :5086 NA's :4768
building_floor_num building_type building_material ad_title ad_seller_id
Min. : 0.00 Min. :0.000 Min. :0.000 Length:16448 Length:16448
1st Qu.: 3.00 1st Qu.:0.000 1st Qu.:0.000 Class :character Class :character
Median : 4.00 Median :0.000 Median :0.000 Mode :character Mode :character
Mean : 4.29 Mean :1.518 Mean :1.182
3rd Qu.: 5.00 3rd Qu.:4.000 3rd Qu.:0.000
Max. :20.00 Max. :6.000 Max. :9.000
NA's :2702 NA's :7530
ad_promo flat_balcony flat_utility_room flat_garage flat_basement flat_garden
Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical
FALSE:13814 FALSE:7608 FALSE:15386 FALSE:11794 FALSE:13585 FALSE:15647
TRUE :2634 TRUE :8840 TRUE :1062 TRUE :4654 TRUE :2863 TRUE :801
flat_tarrace flat_lift flat_two_level flat_kitchen_sep flat_air_cond flat_nonsmokers
Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical
FALSE:15129 FALSE:9297 FALSE:16299 FALSE:11918 FALSE:15851 FALSE:14133
TRUE :1319 TRUE :7151 TRUE :149 TRUE :4530 TRUE :597 TRUE :2315
flat_washmachine flat_dishwasher flat_fridge flat_cooker flat_oven flat_tv_device
Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical
FALSE:7982 FALSE:11869 FALSE:7414 FALSE:7774 FALSE:9109 FALSE:13255
TRUE :8466 TRUE :4579 TRUE :9034 TRUE :8674 TRUE :7339 TRUE :3193
flat_internet flat_television flat_phone flat_anti_blinds flat_anti_doors_windows flat_intercom
Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical
FALSE:9040 FALSE:9917 FALSE:13948 FALSE:15209 FALSE:12909 FALSE:7045
TRUE :7408 TRUE :6531 TRUE :2500 TRUE :1239 TRUE :3539 TRUE :9403
flat_monitoring flat_alarm_sys flat_closed_area quarter cena_m2 centrum
Mode :logical Mode :logical Mode :logical Length:16448 Min. : 1.00 Mode :logical
FALSE:12294 FALSE:16064 FALSE:12377 Class :character 1st Qu.: 31.25 FALSE:14849
TRUE :4154 TRUE :384 TRUE :4071 Mode :character Median : 37.50 TRUE :1599
Mean : 39.39
3rd Qu.: 45.65
Max. :750.00
rataje price_log area_log kawalerka price_tanie_drogie flat_area_cut
Mode :logical Min. : 5.521 Min. :1.435 Mode :logical Length:16448 (0, 10] : 84
FALSE:15224 1st Qu.: 7.208 1st Qu.:3.555 FALSE:13563 Class :character (10, 20]: 524
TRUE :1224 Median : 7.378 Median :3.828 TRUE :2885 Mode :character (20, 30]: 1938
Mean : 7.411 Mean :3.785 30+ :13902
3rd Qu.: 7.601 3rd Qu.:4.007
Max. :10.714 Max. :7.696
summary(rent$price)
Min. 1st Qu. Median Mean 3rd Qu. Max.
250 1350 1600 1763 2000 45000
Wyznaczam średnią cenę i powierzchnię
apply(rent[, c("price", "flat_area")], 2, mean)
price flat_area
1762.62117 47.74883
sapply(rent[, c("price", "flat_area")], mean)
price flat_area
1762.62117 47.74883
lapply(rent[, c("price", "flat_area")], mean)
$price
[1] 1762.621
$flat_area
[1] 47.74883
Wyznaczam średnią cenę według dzielnicy
aggregate(price ~ quarter, data = rent, FUN = mean)
aggregate(cbind(price, flat_area) ~ quarter, data = rent, FUN = mean)
aggregate(cbind(cena=price, pow=flat_area) ~ quarter, data = rent, FUN = mean)
aggregate(cbind(cena=price, pow=flat_area) ~ quarter + flat_for_students,
data = rent, FUN = mean)
NA
NA
Ćwiczenie:
res